Introduction:
- Data Preparation:
# Import data and clean names
whr2015_2022 <- read.csv(here::here("data/world-happiness-report-2015-2022-cleaned.csv"),
stringsAsFactors = FALSE) %>%
janitor::clean_names()
# Remove the index column
whr2015_2022 <- select(whr2015_2022, -x)
# Replace commas with dots and remove asterisks
whr2015_2022 <- data.frame(lapply(whr2015_2022, function(x) gsub(",", ".", x)))
whr2015_2022 <- data.frame(lapply(whr2015_2022, function(x) gsub("\\*", "", x)))
# Convert columns to numeric
columns_to_convert <- c("happiness_rank", "happiness_score", "economy_gdp_per_capita", "family_social_support", "health_life_expectancy", "freedom", "trust_government_corruption", "generosity", "year")
whr2015_2022[columns_to_convert] <- lapply(whr2015_2022[columns_to_convert], as.numeric)
# Mapping for inconsistent country names
country_name_mapping <- c(
"Taiwan Province of China" = "Taiwan",
"Hong Kong S.A.R. of China" = "Hong Kong",
"Hong Kong S.A.R., China" = "Hong Kong",
"Hong Kong S.A.R.. China" = "Hong Kong",
"Czechia" = "Czech Republic",
"North Macedonia" = "Macedonia",
"Trinidad & Tobago" = "Trinidad and Tobago",
"North Cyprus" = "Northern Cyprus",
"Somaliland region" = "Somalia",
"Somaliland Region" = "Somalia",
"Palestinian Territories" = "Palestine",
"Eswatini. Kingdom of" = "Swaziland")
# Apply the mapping to consolidate country names
whr2015_2022$country <- mapvalues(whr2015_2022$country, from = names(country_name_mapping), to = country_name_mapping)
# Mapping for inconsistent region names
region_name_mapping <- c(
"Eastern Asia" = "East Asia",
"Southeastern Asia" = "Southeast Asia",
"Southern Asia" = "South Asia",
"Middle East and Northern Africa" = "Middle East and North Africa")
# Apply the mapping to consolidate region names
whr2015_2022$region <- mapvalues(whr2015_2022$region, from = names(region_name_mapping), to = region_name_mapping)
# Define the correct region for each country based on the standardized assignments
correct_regions <- c(
"Armenia" = "Central and Eastern Europe",
"Australia" = "Australia and New Zealand",
"Taiwan" = "East Asia",
"Belize" = "Latin America and Caribbean",
"Hong Kong" = "East Asia",
"Somalia" = "Sub-Saharan Africa",
"Namibia" = "Sub-Saharan Africa",
"South Sudan" = "Sub-Saharan Africa",
"Trinidad and Tobago" = "Latin America and Caribbean",
"North Cyprus" = "Western Asia or Europe",
"Macedonia" = "Central and Eastern Europe",
"Gambia" = "Sub-Saharan Africa",
"Luxembourg" = "Western Europe",
"Czech Republic" = "Central and Eastern Europe",
"Guatemala" = "Latin America and Caribbean",
"Kuwait" = "Middle East and North Africa",
"Belarus" = "Central and Eastern Europe",
"Turkmenistan" = "Central and Eastern Europe",
"Libya" = "Middle East and North Africa",
"Azerbaijan" = "Central and Eastern Europe",
"Liberia" = "Sub-Saharan Africa",
"Congo" = "Sub-Saharan Africa",
"Niger" = "Sub-Saharan Africa",
"Comoros" = "Sub-Saharan Africa",
"Palestine" = "Middle East and North Africa",
"Swaziland" = "Sub-Saharan Africa",
"Madagascar" = "Sub-Saharan Africa",
"Chad" = "Sub-Saharan Africa",
"Yemen" = "Middle East and North Africa",
"Mauritania" = "Sub-Saharan Africa",
"Lesotho" = "Sub-Saharan Africa",
"Botswana" = "Sub-Saharan Africa",
"Rwanda" = "Sub-Saharan Africa",
"Canada" = "North America",
"Georgia" = "Central and Eastern Europe",
"Kazakhstan" = "Central and Eastern Europe",
"Kyrgyzstan" = "Central and Eastern Europe",
"Moldova" = "Central and Eastern Europe",
"New Zealand" = "Australia and New Zealand",
"Russia" = "Central and Eastern Europe",
"Tajikistan" = "Central and Eastern Europe",
"Ukraine" = "Central and Eastern Europe",
"United States" = "North America",
"Uzbekistan" = "Central and Eastern Europe",
"Northern Cyprus" = "Western Europe"
)
# Update the region for each country in the dataset
for (country in names(correct_regions)) {
whr2015_2022[whr2015_2022$country == country, "region"] <- correct_regions[country]
}
# View the updated data
glimpse(whr2015_2022)
## Rows: 1,229
## Columns: 11
## $ happiness_rank <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,…
## $ country <chr> "Switzerland", "Iceland", "Denmark", "Norw…
## $ region <chr> "Western Europe", "Western Europe", "Weste…
## $ happiness_score <dbl> 7.587, 7.561, 7.527, 7.522, 7.427, 7.406, …
## $ economy_gdp_per_capita <dbl> 1.39651, 1.30232, 1.32548, 1.45900, 1.3262…
## $ family_social_support <dbl> 1.34951, 1.40223, 1.36058, 1.33095, 1.3226…
## $ health_life_expectancy <dbl> 0.94143, 0.94784, 0.87464, 0.88521, 0.9056…
## $ freedom <dbl> 0.66557, 0.62877, 0.64938, 0.66973, 0.6329…
## $ trust_government_corruption <dbl> 0.41978, 0.14145, 0.48357, 0.36503, 0.3295…
## $ generosity <dbl> 0.29678, 0.43630, 0.34139, 0.34699, 0.4581…
## $ year <dbl> 2015, 2015, 2015, 2015, 2015, 2015, 2015, …
- Data exploration:
Data summary
| Name |
whr2015_2022 |
| Number of rows |
1229 |
| Number of columns |
11 |
| _______________________ |
|
| Column type frequency: |
|
| character |
2 |
| numeric |
9 |
| ________________________ |
|
| Group variables |
None |
Variable type: character
| country |
0 |
1 |
4 |
24 |
0 |
165 |
0 |
| region |
0 |
1 |
9 |
28 |
0 |
10 |
0 |
Variable type: numeric
| happiness_rank |
0 |
1 |
77.47 |
44.47 |
1.0 |
39.00 |
77.00 |
116.00 |
158.00 |
▇▇▇▇▇ |
| happiness_score |
0 |
1 |
5.43 |
1.12 |
2.4 |
4.58 |
5.41 |
6.22 |
7.84 |
▁▅▇▇▃ |
| economy_gdp_per_capita |
0 |
1 |
0.98 |
0.43 |
0.0 |
0.67 |
1.01 |
1.30 |
2.21 |
▃▅▇▃▁ |
| family_social_support |
0 |
1 |
1.03 |
0.33 |
0.0 |
0.83 |
1.07 |
1.27 |
1.64 |
▁▂▆▇▅ |
| health_life_expectancy |
0 |
1 |
0.61 |
0.24 |
0.0 |
0.44 |
0.64 |
0.79 |
1.14 |
▂▅▇▇▂ |
| freedom |
0 |
1 |
0.44 |
0.15 |
0.0 |
0.34 |
0.46 |
0.56 |
0.74 |
▁▃▆▇▃ |
| trust_government_corruption |
0 |
1 |
0.13 |
0.11 |
0.0 |
0.06 |
0.10 |
0.16 |
0.59 |
▇▃▁▁▁ |
| generosity |
0 |
1 |
0.20 |
0.12 |
0.0 |
0.12 |
0.19 |
0.26 |
0.84 |
▇▇▂▁▁ |
| year |
0 |
1 |
2018.45 |
2.28 |
2015.0 |
2016.00 |
2018.00 |
2020.00 |
2022.00 |
▇▃▇▃▇ |
Data
visualization:
# Create dataframe with data
country_data <- data.frame(
country=whr2015_2022$country,
value=whr2015_2022$happiness_score)
# Define the colors for the low, mid, and high values
low_color <- "#FF9999" # Softer red
mid_color <- "#FFFF99" # Softer yellow
high_color <- "#99CC99" # Softer green
# Create the color palette function
cols <- colorRampPalette(c(low_color, mid_color, high_color))
# Use cols function to generate the number of colors we need
palette_colors <- cols(length(whr2015_2022))
# Countries to Map function
capture.output(n <- invisible(joinCountryData2Map(country_data,
joinCode="NAME",
nameJoinColumn="country")), file='NUL')
# Output plot in pdf
pdf("world_happiness_map.pdf", width = 10, height = 7)
mapCountryData(n,
nameColumnToPlot="value",
mapTitle="World Map for Happiness Score 2015-2022",
colourPalette=palette_colors,
oceanCol = "#F0F8FF",
missingCountryCol = "#CCCCCCCC",
addLegend = TRUE,
aspect = 1.1,
borderCol = "Black",
lwd =.1)
legend("bottom", # Adjust position as needed
legend=c("Low", "Medium", "High"), # Example categories
fill=c(low_color, mid_color, high_color), # Corresponding colors
title="Happiness Score", # Title of the legend
cex=0.8) # Adjust text size as needed
capture.output(dev.off(), file='NUL')
# Output plot in R console
mapCountryData(n,
nameColumnToPlot="value",
mapTitle="World Map for Happiness Score 2015-2022",
colourPalette=palette_colors,
oceanCol = "#F0F8FF",
missingCountryCol = "#CCCCCCCC",
addLegend = TRUE,
aspect = 1.1,
borderCol = "Black",
lwd =.1)
legend("bottom", # Adjust position as needed
legend=c("Low", "Medium", "High"), # Example categories
fill=c(low_color, mid_color, high_color), # Corresponding colors
title="Happiness Score", # Title of the legend
cex=0.8) # Adjust text size as needed

# Plot 1
ggplot(whr2015_2022, aes(x=economy_gdp_per_capita, y=happiness_score )) +
geom_point() +
geom_smooth(lm = loess) +
labs(title = "GDP per capita vs. Happiness Score",
x = "GDP per capita",
y = "Happiness Score")

# Plot 2
ggplot(whr2015_2022, aes(x=health_life_expectancy, y = happiness_score)) +
geom_point() +
geom_smooth(lm = loess) +
labs(title = "Health (Life expectancy) vs. Happiness Score",
x = "Health (Life expectancy)",
y = "Happiness Score")

# Plot 3
ggplot(whr2015_2022, aes(x = happiness_score, y = freedom)) +
geom_boxplot() +
labs(title = "Happiness Score vs. Freedom",
x = "Happiness Score",
y = "Freedom")

whr2022 <- whr2015_2022 %>%
filter(year == 2022)
# Select top 10 and bottom 10 countries based on happiness score
top10_bottom10_countries <- whr2022 %>%
arrange(desc(happiness_score)) %>%
slice(c(1:10, (n()-9):n()))
# Plotting
ggplot(top10_bottom10_countries) +
geom_point(aes(x = economy_gdp_per_capita,
y = happiness_score,
size = happiness_score,
colour = factor(region),
alpha = 0.85)) +
scale_size_continuous(range = c(2, 15)) +
geom_vline(xintercept = 1.4, colour = "#f7347a", linetype = "longdash") +
geom_hline(yintercept = 5, colour = "#f7347a", linetype = "longdash") +
geom_text(aes(x = economy_gdp_per_capita, y = happiness_score, label = country),
hjust = "left",
vjust = "bottom",
check_overlap = TRUE,
size = 3) +
theme(legend.position = "none") +
labs(title = "Happiness vs. GDP per capita for Top 5 and Bottom 5 countries in 2022",
x = "GDP per capita",
y = "Happiness score") +
annotate("text", x = 0.83, y = 5.2, family = "Helvetica", size = 2.7, color = "gray20",
label = "Lower GDP per capita") +
annotate("text", x = 1.95, y = 5.2, family = "Helvetica", size = 2.7, color = "gray20",
label = "Higher GDP per capita") +
annotate("text", x = 1.53, y = 2.3, family = "Helvetica", size = 2.7, color = "gray20",
label = "Lower Happiness") +
annotate("text", x = 1.53, y = 8, family = "Helvetica", size = 2.7, color = "gray20",
label = "Higher Happiness")

# Getting top 10 countries
whr2015_2022_top10 <- whr2022 %>%
slice_max(happiness_score, n = 10) %>%
mutate(cat = 'top_10',
country_rank = rank(-happiness_score),
country_label = paste0(country, ' (', country_rank, ')'))
# Getting bottom 10 countries
whr2015_2022_bottom10 <- whr2022 %>%
mutate(country_rank = rank(happiness_score),
country_label = paste0(country, ' (', country_rank, ')')) %>%
slice_min(happiness_score, n = 10) %>%
mutate(cat = 'bottom_10')
# Plotting top 10 happiest countries
top_10 <- ggplot(whr2015_2022_top10, aes(x = reorder(country_label, happiness_score))) +
geom_chicklet(aes(y = 10, fill = 4.9), width = 0.5, radius = grid::unit(5, "pt")) +
geom_chicklet(aes(y = happiness_score, fill = happiness_score), width = 0.5, radius = grid::unit(5, "pt")) +
geom_text(aes(y = happiness_score), label = round(whr2015_2022_top10$happiness_score, 2), nudge_y = 0.4, size = 3) +
scale_y_continuous(expand = c(0, 0.1), position = "right", limits = c(0, 10)) +
scale_fill_gradient2(low = 'black', high = '#818aeb', mid = 'white', midpoint = 5) +
coord_flip() +
labs(y="Best possible life = 10", x = '',
title="Top 10 Happiest Countries in 2022",
subtitle="8 of the happiest countries present in Europe",
caption="Source: The World Happiness Report 2022") +
theme_ipsum(grid = '') +
theme(plot.title = element_text(size=15),
plot.subtitle = element_text(size = 12),
plot.caption = element_text(size = 10),
axis.title.x = element_text(size= 10, color = '#555955'),
axis.text.y = element_text(size = 10, color = 'black'),
axis.text.x = element_blank(),
legend.position = 'None')
# Plotting 10 saddest countries
bottom_10 <- ggplot(whr2015_2022_bottom10, aes(x = reorder(country_label, -happiness_score))) +
geom_chicklet(aes(y = 10, fill = 4.9), width = 0.5, radius = grid::unit(5, "pt")) +
geom_chicklet(aes(y = happiness_score, fill = happiness_score), width = 0.5, radius = grid::unit(5, "pt")) +
geom_text(aes(y = happiness_score), label = round(whr2015_2022_bottom10$happiness_score, 2), nudge_y = 0.4, size = 3) +
scale_y_continuous(expand = c(0, 0.1), position = "right", limits = c(0, 10)) +
scale_fill_gradient2(low = '#074040', high = '#4cc2c2', mid = 'white', midpoint = 5) +
coord_flip() +
labs(y="Best possible life = 10", x = '',
title="Top 10 Saddest Countries in 2022",
subtitle="Ordered from saddest to less sad",
caption="Source: The World Happiness Report 2022") +
theme_ipsum(grid = '') +
theme(plot.title = element_text(size=15),
plot.subtitle = element_text(size = 12),
plot.caption = element_text(size = 10),
axis.title.x = element_text(size= 10, color = '#555955'),
axis.text.y = element_text(size = 10, color = 'black'),
axis.text.x = element_blank(),
legend.position = 'None')
# Displaying plots side by side
top_10 + bottom_10

# Creating a new variable for sorted regions
whr2022_sorted <- whr2022 %>%
group_by(region) %>%
mutate(avg_happiness = mean(happiness_score)) %>%
ungroup() %>%
mutate(region_sorted = reorder(region, avg_happiness))
# Plotting with regions sorted by happiness score
region_level <- ggplot(whr2022_sorted, aes(x = region_sorted, y = happiness_score, fill = region_sorted, text = country)) +
geom_beeswarm(aes(color = region_sorted, alpha = 1)) +
labs(title = "Country-wise happiness trends in world regions",
x = "Region",
y = "Happiness score") +
geom_hline(yintercept = 5, colour = "#f7347a", linetype = "longdash") +
theme_classic() +
theme(legend.position = "none",
axis.text.x = element_text(angle = 0, hjust = 1, size = 8)) +
scale_x_discrete(labels = wrap_format(10)) +
scale_fill_brewer(palette = "Spectral") +
scale_color_brewer(palette = "Spectral") +
geom_boxplot(aes(alpha = 2))
# Convert to ggplotly with tooltips
ggplotly(region_level, tooltip = c("country", "happiness_score"))